Completed first cut of Xen support for grant tables.
The device drivers now need modifying to use them.
if ( unlikely((count_info & PGC_count_mask) == 0) ||
unlikely(e == NULL) || unlikely(!get_domain(e)) )
return 0;
- rc = gnttab_try_map(e, d, page, l1v & _PAGE_RW);
+ rc = gnttab_try_map(
+ e, d, pfn, (l1v & _PAGE_RW) ? GNTTAB_MAP_RW : GNTTAB_MAP_RO);
put_domain(e);
return rc;
}
static void put_page_from_l1e(l1_pgentry_t l1e, struct domain *d)
{
- struct pfn_info *page = &frame_table[l1_pgentry_to_pagenr(l1e)];
unsigned long l1v = l1_pgentry_val(l1e);
+ unsigned long pfn = l1_pgentry_to_pagenr(l1e);
+ struct pfn_info *page = &frame_table[pfn];
struct domain *e = page->u.inuse.domain;
- if ( !(l1v & _PAGE_PRESENT) || !pfn_is_ram(l1v >> PAGE_SHIFT) )
+ if ( !(l1v & _PAGE_PRESENT) || !pfn_is_ram(pfn) )
return;
if ( unlikely(e != d) )
* mappings and which unmappings are counted via the grant entry, but
* really it doesn't matter as privileged domains have carte blanche.
*/
- if ( likely(gnttab_try_unmap(e, d, page, l1v & _PAGE_RW)) )
+ if ( likely(gnttab_try_map(e, d, pfn, (l1v & _PAGE_RW) ?
+ GNTTAB_UNMAP_RW : GNTTAB_UNMAP_RO)) )
return;
/* Assume this mapping was made via MMUEXT_SET_FOREIGNDOM... */
}
struct domain *d = current, *nd, *e;
u32 x, y;
domid_t domid;
+ grant_ref_t gntref;
switch ( cmd )
{
}
break;
+ case MMUEXT_TRANSFER_PAGE:
+ domid = (domid_t)(val >> 16);
+ gntref = (grant_ref_t)((val & 0xFF00) | ((ptr >> 2) & 0x00FF));
+
+ if ( unlikely(IS_XEN_HEAP_FRAME(page)) ||
+ unlikely(!pfn_is_ram(pfn)) ||
+ unlikely((e = find_domain_by_id(domid)) == NULL) )
+ {
+ MEM_LOG("Bad frame (%08lx) or bad domid (%d).\n", pfn, domid);
+ okay = 0;
+ break;
+ }
+
+ spin_lock(&d->page_alloc_lock);
+
+ /*
+ * The tricky bit: atomically release ownership while there is just one
+ * benign reference to the page (PGC_allocated). If that reference
+ * disappears then the deallocation routine will safely spin.
+ */
+ nd = page->u.inuse.domain;
+ y = page->count_info;
+ do {
+ x = y;
+ if ( unlikely((x & (PGC_count_mask|PGC_allocated)) !=
+ (1|PGC_allocated)) ||
+ unlikely(nd != d) )
+ {
+ MEM_LOG("Bad page values %08lx: ed=%p(%u), sd=%p,"
+ " caf=%08x, taf=%08x\n", page_to_pfn(page),
+ d, d->domain, nd, x, page->u.inuse.type_info);
+ spin_unlock(&d->page_alloc_lock);
+ put_domain(e);
+ okay = 0;
+ break;
+ }
+ __asm__ __volatile__(
+ LOCK_PREFIX "cmpxchg8b %2"
+ : "=d" (nd), "=a" (y),
+ "=m" (*(volatile u64 *)(&page->count_info))
+ : "0" (d), "1" (x), "c" (NULL), "b" (x) );
+ }
+ while ( unlikely(nd != d) || unlikely(y != x) );
+
+ /*
+ * Unlink from 'd'. At least one reference remains (now anonymous), so
+ * noone else is spinning to try to delete this page from 'd'.
+ */
+ d->tot_pages--;
+ list_del(&page->list);
+
+ spin_unlock(&d->page_alloc_lock);
+
+ spin_lock(&e->page_alloc_lock);
+
+ /* Check that 'e' will accept the page and has reservation headroom. */
+ ASSERT(e->tot_pages <= e->max_pages);
+ if ( unlikely(e->tot_pages == e->max_pages) ||
+ unlikely(!gnttab_prepare_for_transfer(e, d, gntref)) )
+ {
+ MEM_LOG("Transferee has no reservation headroom (%ld,%ld), or "
+ "provided a bad grant ref.\n", e->tot_pages, e->max_pages);
+ spin_unlock(&e->page_alloc_lock);
+ put_domain(e);
+ okay = 0;
+ break;
+ }
+
+ /* Okay, add the page to 'e'. */
+ if ( unlikely(e->tot_pages++ == 0) )
+ get_knownalive_domain(e);
+ list_add_tail(&page->list, &e->page_list);
+ page->u.inuse.domain = e;
+
+ spin_unlock(&e->page_alloc_lock);
+
+ /* Transfer is all done: tell the guest about its new page frame. */
+ gnttab_notify_transfer(e, gntref, pfn);
+
+ put_domain(e);
+ break;
+
case MMUEXT_REASSIGN_PAGE:
if ( unlikely(!IS_PRIV(d)) )
{
grant_ref_t ref;
u16 pin_flags;
struct domain *ld, *rd;
- u16 sflags, prev_sflags;
+ u16 sflags;
active_grant_entry_t *act;
grant_entry_t *sha;
long rc = 0;
unsigned long frame;
+ /*
+ * We bound the number of times we retry CMPXCHG on memory locations
+ * that we share with a guest OS. The reason is that the guest can modify
+ * that location at a higher rate than we can read-modify-CMPXCHG, so
+ * the guest could cause us to livelock. There are a few cases
+ * where it is valid for the guest to race our updates (e.g., to change
+ * the GTF_readonly flag), so we allow a few retries before failing.
+ */
+ int retries = 0;
+
ld = current;
/* Bitwise-OR avoids short-circuiting which screws control flow. */
for ( ; ; )
{
- u32 scombo, prev_scombo;
+ u32 scombo, prev_scombo, new_scombo;
if ( unlikely((sflags & GTF_type_mask) != GTF_permit_access) ||
unlikely(sdom != ld->domain) )
"Bad flags (%x) or dom (%d). (NB. expected dom %d)\n",
sflags, sdom, ld->domain);
- sflags |= GTF_reading;
+ /* Merge two 16-bit values into a 32-bit combined update. */
+ /* NB. Endianness! */
+ prev_scombo = scombo = ((u32)sdom << 16) | (u32)sflags;
+
+ new_scombo = scombo | GTF_reading;
if ( !(pin_flags & GNTPIN_readonly) )
{
- sflags |= GTF_writing;
+ new_scombo |= GTF_writing;
if ( unlikely(sflags & GTF_readonly) )
PIN_FAIL(EINVAL,
"Attempt to write-pin a r/o grant entry.\n");
}
- /* Merge two 16-bit values into a 32-bit combined update. */
- /* NB. Endianness! */
- prev_scombo = scombo = ((u32)sdom << 16) | (u32)sflags;
-
- /* NB. prev_sflags is updated in place to seen value. */
- if ( unlikely(cmpxchg_user((u32 *)&sha->flags, prev_scombo,
- prev_scombo | GTF_writing)) )
+ /* NB. prev_scombo is updated in place to seen value. */
+ if ( unlikely(cmpxchg_user((u32 *)&sha->flags,
+ prev_scombo,
+ new_scombo)) )
PIN_FAIL(EINVAL,
"Fault while modifying shared flags and domid.\n");
/* Did the combined update work (did we see what we expected?). */
- if ( prev_scombo == scombo )
+ if ( likely(prev_scombo == scombo) )
break;
+ if ( retries++ == 4 )
+ PIN_FAIL(EINVAL,
+ "Shared grant entry is unstable.\n");
+
/* Didn't see what we expected. Split out the seen flags & dom. */
/* NB. Endianness! */
sflags = (u16)prev_scombo;
else if ( act->status & GNTPIN_readonly )
{
sflags = sha->flags;
- do {
- prev_sflags = sflags;
- if ( unlikely(prev_sflags & GTF_readonly) )
+ for ( ; ; )
+ {
+ u16 prev_sflags;
+
+ if ( unlikely(sflags & GTF_readonly) )
PIN_FAIL(EINVAL,
"Attempt to write-pin a r/o grant entry.\n");
PIN_FAIL(EINVAL,
"Attempt to write-pin a unwritable page.\n");
+ prev_sflags = sflags;
+
/* NB. prev_sflags is updated in place to seen value. */
if ( unlikely(cmpxchg_user(&sha->flags, prev_sflags,
prev_sflags | GTF_writing)) )
PIN_FAIL(EINVAL,
"Fault while modifying shared flags.\n");
+
+ if ( likely(prev_sflags == sflags) )
+ break;
+
+ if ( retries++ == 4 )
+ PIN_FAIL(EINVAL,
+ "Shared grant entry is unstable.\n");
+
+ sflags = prev_sflags;
}
- while ( prev_sflags != sflags );
}
/* Update status word -- this includes device accessibility. */
return rc;
}
+static long
+gnttab_setup_table(
+ gnttab_setup_table_t *uop)
+{
+ gnttab_setup_table_t op;
+ struct domain *d;
+
+ if ( unlikely(__copy_from_user(&op, uop, sizeof(op)) != 0) )
+ {
+ DPRINTK("Fault while reading gnttab_setup_table_t.\n");
+ return -EFAULT;
+ }
+
+ if ( unlikely(op.nr_frames > 1) )
+ {
+ DPRINTK("Xen only supports one grant-table frame per domain.\n");
+ return -EINVAL;
+ }
+
+ if ( op.dom == DOMID_SELF )
+ op.dom = current->domain;
+
+ if ( unlikely((d = find_domain_by_id(op.dom)) == NULL) )
+ {
+ DPRINTK("Bad domid %d.\n", op.dom);
+ return -ESRCH;
+ }
+
+ if ( op.nr_frames == 1 )
+ {
+ ASSERT(d->grant_table != NULL);
+
+ if ( unlikely(put_user(virt_to_phys(d->grant_table) >> PAGE_SHIFT,
+ &op.frame_list[0])) )
+ {
+ DPRINTK("Fault while writing frame list.\n");
+ put_domain(d);
+ return -EFAULT;
+ }
+ }
+
+ put_domain(d);
+ return 0;
+}
+
long
do_grant_table_op(
gnttab_op_t *uop)
case GNTTABOP_update_pin_status:
rc = gnttab_update_pin_status(&uop->u.update_pin_status);
break;
+ case GNTTABOP_setup_table:
+ rc = gnttab_setup_table(&uop->u.setup_table);
+ break;
default:
rc = -ENOSYS;
break;
int
gnttab_try_map(
- struct domain *rd, struct domain *ld, struct pfn_info *page, int readonly)
+ struct domain *rd, struct domain *ld, unsigned long frame, int op)
{
+ grant_table_t *t;
+ active_grant_entry_t *a;
+ u16 *ph, h;
+
+ if ( unlikely((t = rd->grant_table) == NULL) )
+ return 0;
+
+ spin_lock(&t->lock);
+
+ ph = &t->maphash[GNT_MAPHASH(frame)];
+ while ( (h = *ph) != GNT_MAPHASH_INVALID )
+ {
+ if ( (a = &t->active[*ph])->frame != frame )
+ goto found;
+ ph = &a->next;
+ }
+
+ fail:
+ spin_unlock(&t->lock);
return 0;
+
+ found:
+ if ( !(a->status & GNTPIN_host_accessible) )
+ goto fail;
+
+ switch ( op )
+ {
+ case GNTTAB_MAP_RO:
+ if ( (a->status & GNTPIN_rmap_mask) == GNTPIN_rmap_mask )
+ goto fail;
+ a->status += 1 << GNTPIN_rmap_shift;
+ break;
+
+ case GNTTAB_MAP_RW:
+ if ( (a->status & GNTPIN_wmap_mask) == GNTPIN_wmap_mask )
+ goto fail;
+ a->status += 1 << GNTPIN_wmap_shift;
+ break;
+
+ case GNTTAB_UNMAP_RO:
+ if ( (a->status & GNTPIN_rmap_mask) == 0 )
+ goto fail;
+ a->status -= 1 << GNTPIN_rmap_shift;
+ break;
+
+ case GNTTAB_UNMAP_RW:
+ if ( (a->status & GNTPIN_wmap_mask) == 0 )
+ goto fail;
+ a->status -= 1 << GNTPIN_wmap_shift;
+ break;
+
+ default:
+ BUG();
+ }
+
+ spin_unlock(&t->lock);
+ return 1;
}
-int
-gnttab_try_unmap(
- struct domain *rd, struct domain *ld, struct pfn_info *page, int readonly)
+int
+gnttab_prepare_for_transfer(
+ struct domain *rd, struct domain *ld, grant_ref_t ref)
{
+ grant_table_t *t;
+ grant_entry_t *e;
+ domid_t sdom;
+ u16 sflags;
+ u32 scombo, prev_scombo;
+ int retries = 0;
+
+ if ( unlikely((t = rd->grant_table) == NULL) ||
+ unlikely(ref >= NR_GRANT_ENTRIES) )
+ {
+ DPRINTK("Dom %d has no g.t., or ref is bad (%d).\n", rd->domain, ref);
+ return 0;
+ }
+
+ spin_lock(&t->lock);
+
+ e = &t->shared[ref];
+
+ sflags = e->flags;
+ sdom = e->domid;
+
+ for ( ; ; )
+ {
+ if ( unlikely(sflags != GTF_accept_transfer) ||
+ unlikely(sdom != ld->domain) )
+ {
+ DPRINTK("Bad flags (%x) or dom (%d). (NB. expected dom %d)\n",
+ sflags, sdom, ld->domain);
+ goto fail;
+ }
+
+ /* Merge two 16-bit values into a 32-bit combined update. */
+ /* NB. Endianness! */
+ prev_scombo = scombo = ((u32)sdom << 16) | (u32)sflags;
+
+ /* NB. prev_scombo is updated in place to seen value. */
+ if ( unlikely(cmpxchg_user((u32 *)&e->flags, prev_scombo,
+ prev_scombo | GTF_transfer_committed)) )
+ {
+ DPRINTK("Fault while modifying shared flags and domid.\n");
+ goto fail;
+ }
+
+ /* Did the combined update work (did we see what we expected?). */
+ if ( likely(prev_scombo == scombo) )
+ break;
+
+ if ( retries++ == 4 )
+ {
+ DPRINTK("Shared grant entry is unstable.\n");
+ goto fail;
+ }
+
+ /* Didn't see what we expected. Split out the seen flags & dom. */
+ /* NB. Endianness! */
+ sflags = (u16)prev_scombo;
+ sdom = (u16)(prev_scombo >> 16);
+ }
+
+ spin_unlock(&t->lock);
+ return 1;
+
+ fail:
+ spin_unlock(&t->lock);
return 0;
}
+void
+gnttab_notify_transfer(
+ struct domain *rd, grant_ref_t ref, unsigned long frame)
+{
+ wmb(); /* Ensure that the reassignment is globally visible. */
+ rd->grant_table->shared[ref].frame = frame;
+}
+
int
grant_table_create(
struct domain *d)
* 3. Write memory barrier (WMB).
* 4. Write ent->flags, inc. valid type.
*
- * Removing an unused GTF_permit_access entry:
+ * Invalidating an unused GTF_permit_access entry:
* 1. flags = ent->flags.
* 2. Observe that !(flags & (GTF_reading|GTF_writing)).
* 3. Check result of SMP-safe CMPXCHG(&ent->flags, flags, 0).
* NB. No need for WMB as reuse of entry is control-dependent on success of
* step 3, and all architectures guarantee ordering of ctrl-dep writes.
+ *
+ * Invalidating an in-use GTF_permit_access entry:
+ * This cannot be done directly. Request assistance from the domain controller
+ * which can set a timeout on the use of a grant entry and take necessary
+ * action. (NB. This is not yet implemented!).
*
- * Removing an unused GTF_accept_transfer entry:
- * 1. Check result of SMP-safe CMPXCHG(&ent->frame, 0, <any non-zero value>).
- * 2. Clear ent->flags.
- * 3. WMB (ordering of step 2 vs. steps 1,2 of introducing a new entry).
- *
+ * Invalidating an unused GTF_accept_transfer entry:
+ * 1. flags = ent->flags.
+ * 2. Observe that !(flags & GTF_transfer_committed). [*]
+ * 3. Check result of SMP-safe CMPXCHG(&ent->flags, flags, 0).
+ * NB. No need for WMB as reuse of entry is control-dependent on success of
+ * step 3, and all architectures guarantee ordering of ctrl-dep writes.
+ * [*] If GTF_transfer_committed is set then the grant entry is 'committed'.
+ * The guest must /not/ modify the grant entry until the address of the
+ * transferred frame is written. It is safe for the guest to spin waiting
+ * for this to occur (detect by observing non-zero value in ent->frame).
+ *
+ * Invalidating a committed GTF_accept_transfer entry:
+ * 1. Wait for ent->frame != 0.
+ *
* Changing a GTF_permit_access from writable to read-only:
* Use SMP-safe CMPXCHG to set GTF_readonly, while checking !GTF_writing.
*
#define _GTF_writing (4)
#define GTF_writing (1<<_GTF_writing)
+/*
+ * Subflags for GTF_accept_transfer:
+ * GTF_transfer_committed: Xen sets this flag to indicate that it is committed
+ * to transferring ownership of a page frame. When a guest sees this flag
+ * it must /not/ modify the grant entry until the address of the
+ * transferred frame is written into the entry.
+ * NB. It is safe for the guest to spin-wait on the frame address:
+ * Xen will always write the frame address in a timely manner.
+ */
+#define _GTF_transfer_committed (2)
+#define GTF_transfer_committed (1<<_GTF_transfer_committed)
+
/***********************************
* GRANT TABLE QUERIES AND USES
MEMORY_PADDING;
} PACKED gnttab_update_pin_status_t; /* 16 bytes */
+/*
+ * GNTTABOP_setup_table: Set up a grant table for <dom> comprising at least
+ * <nr_frames> pages. The frame addresses are written to the <frame_list>.
+ * Only <nr_frames> addresses are written, even if the table is larger.
+ * NOTES:
+ * 1. <dom> may be specified as DOMID_SELF.
+ * 2. Only a sufficiently-privileged domain may specify <dom> != DOMID_SELF.
+ * 3. Xen may not support more than a single grant-table page per domain.
+ */
+#define GNTTABOP_setup_table 1
+typedef struct {
+ /* IN parameters. */
+ domid_t dom; /* 0 */
+ u16 nr_frames; /* 2 */
+ u32 __pad;
+ /* OUT parameters. */
+ unsigned long *frame_list; /* 8 */
+ MEMORY_PADDING;
+} PACKED gnttab_setup_table_t; /* 16 bytes */
+
typedef struct {
u32 cmd; /* GNTTABOP_* */ /* 0 */
u32 __reserved; /* 4 */
union { /* 8 */
gnttab_update_pin_status_t update_pin_status;
+ gnttab_setup_table_t setup_table;
u8 __dummy[16];
} PACKED u;
} PACKED gnttab_op_t; /* 24 bytes */
* ptr[:2] -- Linear address of LDT base (NB. must be page-aligned).
* val[:8] -- Number of entries in LDT.
*
+ * val[7:0] == MMUEXT_TRANSFER_PAGE:
+ * val[31:16] -- Domain to whom page is to be transferred.
+ * (val[15:8],ptr[9:2]) -- 16-bit reference into transferee's grant table.
+ * ptr[:12] -- Page frame to be reassigned to the FD.
+ * (NB. The frame must currently belong to the calling domain).
+ *
* val[7:0] == MMUEXT_SET_FOREIGNDOM:
- * val[31:15] -- Domain to set as the Foreign Domain (FD).
+ * val[31:16] -- Domain to set as the Foreign Domain (FD).
* (NB. DOMID_SELF is not recognised)
* If FD != DOMID_IO then the caller must be privileged.
*
+ * val[7:0] == MMUEXT_CLEAR_FOREIGNDOM:
+ * Clears the FD.
+ *
* val[7:0] == MMUEXT_REASSIGN_PAGE:
* ptr[:2] -- A machine address within the page to be reassigned to the FD.
* (NB. page must currently belong to the calling domain).
- *
- * val[7:0] == MMUEXT_CLEAR_FOREIGNDOM:
- * Clears the FD.
*/
#define MMU_NORMAL_PT_UPDATE 0 /* checked '*ptr = val'. ptr is MA. */
#define MMU_MACHPHYS_UPDATE 2 /* ptr = MA of frame to modify entry for */
#define MMUEXT_TLB_FLUSH 6 /* ptr = NULL */
#define MMUEXT_INVLPG 7 /* ptr = VA to invalidate */
#define MMUEXT_SET_LDT 8 /* ptr = VA of table; val = # entries */
-#define MMUEXT_SET_FOREIGNDOM 9 /* val[31:15] = dom */
-#define MMUEXT_REASSIGN_PAGE 10
+#define MMUEXT_TRANSFER_PAGE 9 /* ptr = MA of frame; val[31:16] = dom */
+#define MMUEXT_SET_FOREIGNDOM 10 /* val[31:16] = dom */
#define MMUEXT_CLEAR_FOREIGNDOM 11
+#define MMUEXT_REASSIGN_PAGE 12
#define MMUEXT_CMD_MASK 255
#define MMUEXT_CMD_SHIFT 8
struct domain *d);
/* Create/destroy host-CPU mappings via a grant-table entry. */
+#define GNTTAB_MAP_RO 0
+#define GNTTAB_MAP_RW 1
+#define GNTTAB_UNMAP_RO 2
+#define GNTTAB_UNMAP_RW 3
int gnttab_try_map(
- struct domain *rd, struct domain *ld, struct pfn_info *page, int readonly);
-int gnttab_try_unmap(
- struct domain *rd, struct domain *ld, struct pfn_info *page, int readonly);
+ struct domain *rd, struct domain *ld, unsigned long frame, int op);
+
+/*
+ * Check that the given grant reference (rd,ref) allows 'ld' to transfer
+ * ownership of a page frame. If so, lock down the grant entry.
+ */
+int
+gnttab_prepare_for_transfer(
+ struct domain *rd, struct domain *ld, grant_ref_t ref);
+
+/* Notify 'rd' of a completed transfer via an already-locked grant entry. */
+void
+gnttab_notify_transfer(
+ struct domain *rd, grant_ref_t ref, unsigned long frame);
#endif /* __XEN_GRANT_H__ */